#drop rows with NA for vaccine percentagemmr_cov <- mmr_cov %>%drop_na(estimate_pct)#create a column with percent cast to numericmmr_cov$num_pct <- mmr_cov$estimate_pctmmr_cov$num_pct <-as.numeric(sub("%","",mmr_cov$num_pct))/100
#summary statistics for vaccine coverage, per stategroup_by(mmr_cov, geography) %>%get_summary_stats(num_pct)
#plot median vaccination rate by yearsumm_year %>%tail(10) %>%ggplot( aes(x=school_year, y=median)) +geom_line(color="black") +geom_point()
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
Median vaccination rates among states in the USA appear to be showing a downward trend starting from the 2019-20200 school year. We will now look at the trajectories for the states with the lowest vaccination rates in 2023-2024.
geography school_year estimate_pct population_size percent_surveyed
1 Idaho 2023-24 79.6% 22376 100.0
2 Alaska 2023-24 84.3% 8644 88.9
3 Wisconsin 2023-24 84.8% 62028 98.2
4 Minnesota 2023-24 87.0% 66032 99.1
5 Florida 2023-24 88.1% 228213 100.0
6 Colorado 2023-24 88.3% 61662 100.0
survey_type categories num_pct
1 Census Less than 90% 0.796
2 Census (pub.), Not Conducted (prv.) Less than 90% 0.843
3 Census Less than 90% 0.848
4 Census Less than 90% 0.870
5 Census Less than 90% 0.881
6 Census Less than 90% 0.883
The states with the five lowest vaccination rates are Idaho (79.6%), Alaska (84.3%), Wisconsin (84.8%), Minnesota (87.0%), and Florida (88.1%).
geography school_year estimate_pct population_size percent_surveyed
1 Idaho 2009-10 87.0% 22624 100.0
2 Florida 2009-10 91.3% 218630 100.0
3 Wisconsin 2009-10 94.2% 61095 2.3
4 Minnesota 2009-10 95.1% 70653 100.0
survey_type categories num_pct
1 Census Less than 90% 0.870
2 Census 90-94.9% 0.913
3 Random Sample 90-94.9% 0.942
4 Census 95%+ 0.951
Vaccination rates for these states were notably higher in 2009-10 (excepting Alaska, which did not report its vaccination rate that year.)
mmr_cov_low <- mmr_cov %>%filter(geography %in% low_2023)ggplot(mmr_cov_low, aes(geography, num_pct, fill = geography)) +geom_boxplot() +geom_jitter(width =0.2) +guides(fill ="none") +labs(x ="", y ="Vaccination Rate",title ="Vaccination Variation by State (2023-2024)",subtitle ="States with Lowest Vaccination Rates" ) +theme(plot.title =element_text(hjust =0.5, size =14, face ="bold"),plot.subtitle =element_text(hjust =0.5, size =12))
Wisconsin and Alaska show the greatest variation in vaccination rates, of the five states with the lowest rates in 2023-24.
#at this point we will add a numeric column for the dates.mmr_cov$num_years <-substr(mmr_cov$school_year, 1, 4)mmr_cov$num_years <-as.numeric(mmr_cov$num_years)head(mmr_cov)
geography school_year estimate_pct population_size percent_surveyed
1 Alabama 2023-24 93.8% 54565 100.0
2 Alaska 2023-24 84.3% 8644 88.9
3 Arizona 2023-24 89.3% 74834 99.6
4 Arkansas 2023-24 92.5% 37535 95.4
5 California 2023-24 96.2% 569680 100.0
6 Colorado 2023-24 88.3% 61662 100.0
survey_type categories num_pct num_years
1 Census 90-94.9% 0.938 2023
2 Census (pub.), Not Conducted (prv.) Less than 90% 0.843 2023
3 Census Less than 90% 0.893 2023
4 Census (pub.), Vol. Response (prv.) 90-94.9% 0.925 2023
5 Census 95%+ 0.962 2023
6 Census Less than 90% 0.883 2023
#fitting linear mixed effects model.#this is inaccurate, just for practice with lmer for now.library(lme4)
Warning: package 'lme4' was built under R version 4.4.2
Loading required package: Matrix
Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':
expand, pack, unpack
Attaching package: 'lme4'
The following object is masked from 'package:fabletools':
refit
lin_0 <-lmer(num_years ~1+ (1| geography), data = mmr_cov)
boundary (singular) fit: see help('isSingular')
summary(lin_0)
Linear mixed model fit by REML ['lmerMod']
Formula: num_years ~ 1 + (1 | geography)
Data: mmr_cov
REML criterion at convergence: 3861.8
Scaled residuals:
Min 1Q Median 3Q Max
-1.85230 -0.88142 0.08946 0.81762 1.54578
Random effects:
Groups Name Variance Std.Dev.
geography (Intercept) 0.00 0.00
Residual 16.97 4.12
Number of obs: 681, groups: geography, 53
Fixed effects:
Estimate Std. Error t value
(Intercept) 2016.6314 0.1579 12773
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
library(Epi)
Attaching package: 'Epi'
The following object is masked from 'package:lme4':
factorize
ci.lin(lin_0)
Estimate StdErr z P 2.5% 97.5%
(Intercept) 2016.631 0.157878 12773.35 0 2016.322 2016.941
mmr_cov %>%filter(geography %in% low_2023) %>%ggplot(aes(school_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2015-2024)",y ="Vaccination rate", x ="Year")
state_vax <- mmr_cov %>%filter(school_year =="2023-24") %>%group_by(state = geography) %>%summarize(mean_vax =mean(num_pct))plot_usmap(data = state_vax, values ="mean_vax", color ="white") +scale_fill_continuous(low ="red", high ="green", name ="Vaccination %") +labs(title ="Measles Vaccination Rates by State, 2023") +theme(legend.postion ="right")
Warning in plot_theme(plot): The `legend.postion` theme element is not defined
in the element hierarchy.
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
ts_vax <- mmr_cov %>%filter(geography %in% low_2023) %>%mutate(year =as.integer(substr(school_year, 1, 4)) ) %>%group_by(year) %>%summarize(mean_vax =mean(num_pct, na.rm =TRUE)) %>%ungroup() %>%# explicitly fill any missing year between min and maxcomplete(year =seq(min(year), max(year), by =1)) %>%as_tsibble(index = year)# now every year in the span is present (missing mean_vax will be NA)fit <- ts_vax %>%model(ETS(mean_vax))
Warning: 1 error encountered for ETS(mean_vax)
[1] ETS does not support missing values.
fc <-forecast(fit, h =2)autoplot(ts_vax, mean_vax) +autolayer(fc, .mean) +labs(title ="Forecasted Vaccination Rates (2025–2026)",x ="Year",y ="Mean Vaccination Rate" )
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).
# build a plain ts-series with frequency = 1 (annual)vax_ts <-ts(data = ts_vax$mean_vax,start =min(ts_vax$year),frequency =1)fc <-forecast(ets(vax_ts), h =2)
Warning in ets(vax_ts): Missing values encountered. Using longest contiguous
portion of time series
library(tsibble)library(fable)library(dplyr)library(tidyr)ts_vax_fable <- mmr_cov %>%filter(geography %in% low_2023) %>%mutate(year =as.integer(substr(school_year,1,4))) %>%group_by(year) %>%summarize(mean_vax =mean(num_pct, na.rm =TRUE)) %>%ungroup() %>%complete(year =seq(min(year), max(year), by =1)) %>%as_tsibble(index = year) %>%# simple linear interpolation for the missing yearsmutate(mean_vax =approx(year[!is.na(mean_vax)], mean_vax[!is.na(mean_vax)],xout = year)$y)fit_fbl <- ts_vax_fable %>%model(ETS(mean_vax))fc_fbl <-forecast(fit_fbl, h =2)autoplot(fc_fbl) +labs(title ="Forecasted Vaccination Rates (2025–2026)")
library(dplyr)library(tsibble)library(tidyr)library(fable)library(ggplot2)# 1) Build one annual series per state, and fill implicit gapsts_states <- mmr_cov %>%mutate(year =as.integer(substr(school_year, 1, 4))) %>%group_by(geography, year) %>%summarize(mean_vax =mean(num_pct, na.rm =TRUE), .groups="drop") %>%as_tsibble(key = geography, index = year) %>%# make every year explicit (2009:2023), with NA where missingfill_gaps() %>%# simple linear interpolation of those NAs (you could also choose carry-forward, etc.)group_by_key() %>%mutate(mean_vax =approx(x = year[!is.na(mean_vax)],y = mean_vax[!is.na(mean_vax)],xout = year,rule =2 )$y ) %>%ungroup()# 2) Fit ETS to each statemodels <- ts_states %>%model(ETS =ETS(mean_vax))# 3) Forecast two years aheadfc_states <- models %>%forecast(h =2)autoplot(fc_states) +labs(title ="Vaccination Trends (2009–2023) and Forecast (2025–26)",x ="Year",y ="Mean Vaccination Rate" ) +facet_wrap(~ geography) +theme_minimal()
# A tibble: 52 × 3
geography year point
<chr> <dbl> <dbl>
1 Alabama 2025 0.928
2 Alaska 2025 0.843
3 Arizona 2025 0.879
4 Arkansas 2025 0.925
5 California 2025 0.962
6 Colorado 2025 0.882
7 Connecticut 2025 0.977
8 Delaware 2025 0.938
9 District of Columbia 2025 0.854
10 Florida 2025 0.841
# ℹ 42 more rows
Adding graphs for how each region has changed from 2009 to 2023.
#at this point we will add a numeric column for the dates.mmr_cov$first_year <-substr(mmr_cov$school_year, 1, 4)mmr_cov$num_years <-as.numeric(mmr_cov$first_year)head(mmr_cov)
geography school_year estimate_pct population_size percent_surveyed
1 Alabama 2023-24 93.8% 54565 100.0
2 Alaska 2023-24 84.3% 8644 88.9
3 Arizona 2023-24 89.3% 74834 99.6
4 Arkansas 2023-24 92.5% 37535 95.4
5 California 2023-24 96.2% 569680 100.0
6 Colorado 2023-24 88.3% 61662 100.0
survey_type categories num_pct num_years
1 Census 90-94.9% 0.938 2023
2 Census (pub.), Not Conducted (prv.) Less than 90% 0.843 2023
3 Census Less than 90% 0.893 2023
4 Census (pub.), Vol. Response (prv.) 90-94.9% 0.925 2023
5 Census 95%+ 0.962 2023
6 Census Less than 90% 0.883 2023
first_year
1 2023
2 2023
3 2023
4 2023
5 2023
6 2023
#rates for new englandmmr_cov %>%filter(geography %in% new_england) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): New England",y ="Vaccination rate", x ="Year")
#rates for middle atlanticmmr_cov %>%filter(geography %in% middle_atlantic) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): Middle Atlantic",y ="Vaccination rate", x ="Year")
#rates for east north centralmmr_cov %>%filter(geography %in% east_north_central) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024: East North Central)",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% west_north_central) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): West North Central",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% south_atlantic) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): South Atlantic",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% east_south_central) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): East South Central",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% west_south_central) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): West South Central",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% mountain) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): Mountain",y ="Vaccination rate", x ="Year")
mmr_cov %>%filter(geography %in% pacific) %>%ggplot(aes(first_year, num_pct, color = geography, group = geography)) +geom_line() +geom_point() +labs(title ="Trends in Vaccination Rates (2009-2024): Pacific",y ="Vaccination rate", x ="Year")